Here is the report of the network analysis of the crop health data.
library(dplyr)
library(plyr)
library(reshape)
library(reshape2)
library(gridExtra)
library(lubridate)
library(doBy)
library(cluster)
library(ggplot2)
library(scales)
library(bioDist)
library(vegan)
library(mvtnorm)
library(igraph)
library(WGCNA)
## ==========================================================================
## *
## * Package WGCNA 1.47 loaded.
## *
## * Important note: It appears that your system supports multi-threading,
## * but it is not enabled within WGCNA in R.
## * To allow multi-threading within WGCNA with all available cores, use
## *
## * allowWGCNAThreads()
## *
## * within R. Use disableWGCNAThreads() to disable threading if necessary.
## * Alternatively, set the following environment variable on your system:
## *
## * ALLOW_WGCNA_THREADS=<number_of_processors>
## *
## * for example
## *
## * ALLOW_WGCNA_THREADS=4
## *
## * To set the environment variable in linux bash shell, type
## *
## * export ALLOW_WGCNA_THREADS=4
## *
## * before running R. Other operating systems or shells will
## * have a similar command to achieve the same aim.
## *
## ==========================================================================
library(cowplot)
library(NMF)
Load the survey data
library(RCurl) # run this package for load the data form the website
file <- getURL("https://docs.google.com/spreadsheets/d/1zB7gNdI7Nk7SuHuPWcjzaKnjuwkvL6sOVMo0zMfuV-c/pub?gid=558862364&single=true&output=csv") # load data from the google drive
data <- read.csv(text = file) # read data which is formated as the csv
data[data == "-"] <- NA # replace '-' with NA
data[data == ""] <- NA # replace 'missing data' with NA
#==== to lower variable names ====
names(data) <- tolower(names(data)) # for more consistancy
select out the column which is not inlcuded in the analysis
# remove the variables that are not included for analysis
data$phase <- NULL # there is only one type yype of phase in the survey
data$identifier <- NULL # this variable is not included in the analysis
data$village <- NULL # remove name of village
data$year <- NULL # remove year data
data$season <- NULL # remove season data
data$lat <- NULL # remove latitude data
data$long <- NULL # remove longitude data
data$fa <- NULL # field area is not include in the analysis
data$fn <- NULL # farmer name can not be included in this survey analysis
data$fp <- NULL # I do not know what is fp
data$lfm <- NULL # there is only one type of land form in this survey
data$ced <- NULL # Date data can not be included in the network analysis
data$cedjul <- NULL # remove crop establisment julian date data
data$hd <- NULL # Date data can not be included in the network analysis
data$hdjul <- NULL # remove harvest julian date
data$cvr <- NULL # reove crop varieties
data$varcoded <- NULL # I will recode them
data$fymcoded <- NULL # remove code data of fym
data$mu <- NULL # no record of mullucicide data
data$nplsqm <- NULL # remove number of plant per square meter
data$rbpx <- NULL # no record of rice bug p
#==== corract the variable type =====
# reformat of the vaibales
data <- transform(data,
country = as.factor(country),
pc = as.factor(pc),
cem = as.factor(cem),
ast = as.factor(ast),
ccd = as.numeric(ccd),
vartype = as.factor(vartype),
fym = as.character(fym),
n = as.numeric(n),
p = as.numeric(p) ,
k = as.numeric(k),
mf = as.numeric(mf),
wcp = as.factor(wcp),
iu = as.numeric(iu),
hu = as.numeric(hu),
fu = as.numeric(fu),
cs = as.factor(cs),
ldg = as.numeric(ldg),
yield = as.numeric(yield) ,
dscum = as.factor(dscum),
wecum = as.factor(wecum),
ntmax = as.numeric(ntmax),
npmax = as.numeric(npmax),
nltmax = as.numeric(nltmax),
nlhmax = as.numeric(nltmax),
waa = as.numeric(waa),
wba = as.numeric(wba) ,
dhx = as.numeric(dhx),
whx = as.numeric(whx),
ssx = as.numeric(ssx),
wma = as.numeric(wma),
lfa = as.numeric(lfa),
lma = as.numeric(lma),
rha = as.numeric(rha) ,
thrx = as.numeric(thrx),
pmx = as.numeric(pmx),
defa = as.numeric(defa) ,
bphx = as.numeric(bphx),
wbpx = as.numeric(wbpx),
awx = as.numeric(awx),
rbx =as.numeric(rbx),
rbbx = as.numeric(rbbx),
glhx = as.numeric(glhx),
stbx=as.numeric(stbx),
hbx= as.numeric(hbx),
bbx = as.numeric(bbx),
blba = as.numeric(blba),
lba = as.numeric(lba),
bsa = as.numeric(bsa),
blsa = as.numeric(blsa),
nbsa = as.numeric(nbsa),
rsa = as.numeric(rsa),
lsa = as.numeric(lsa),
shbx = as.numeric(shbx) ,
shrx = as.numeric(shrx),
srx= as.numeric(srx),
fsmx = as.numeric(fsmx),
nbx = as.numeric(nbx),
dpx = as.numeric(dpx),
rtdx = as.numeric(rtdx),
rsdx = as.numeric(rsdx),
gsdx =as.numeric(gsdx),
rtx = as.numeric(rtx)
)
Now data are in the right format and ready to further analysis, but there are some variables needed to code as the number not character
Before proforming cluster analysis which is the further analysis, I need to code the character to number
if previosu crop data are rice, they will be coded as 1, but others, not rice, they will be coded as 0.
data$pc <- ifelse(data$pc == "rice", 1, 0)
Transplanting rice is 1, and direct seeded rice is 2.
#Crop establisment method
levels(data$cem)[levels(data$cem) == "trp"] <- 1
levels(data$cem)[levels(data$cem) == "TPR"] <- 1
levels(data$cem)[levels(data$cem) == "DSR"] <- 2
levels(data$cem)[levels(data$cem) == "dsr"] <- 2
fym there are two type 0 and 1, raw data are recorded as no, yes, and value, if the value is 0 which mean 0 and if the value more than 0 which means 1
data$fym <- ifelse(data$fym == "no", 0,
ifelse(data$fym == "0", 0, 1
)
)
Vartype there are three type treditional varieties coded as 1, modern varities coded as 2 and hybrid coded as 3.
data$vartype <- ifelse(data$vartype == "tv", 1,
ifelse(data$vartype == "mv", 2,
ifelse(data$vartype == "hyb", 3, NA
)
)
)
Weed management practices have three type, hand coded as 1, herb coded as 2, and herb-hand coded as 3.
# wcp weed control management
levels(data$wcp)[levels(data$wcp) == "hand"] <- 1
levels(data$wcp)[levels(data$wcp) == "herb"] <- 2
levels(data$wcp)[levels(data$wcp) == "herb-hand"] <- 3
Crop status have five level, very poor coded as 1, poor coded as 2, average coded as 3, good coded as 4 and very good coded as 5.
# Crop Status
levels(data$cs)[levels(data$cs) == "very poor"] <- 1
levels(data$cs)[levels(data$cs) == "poor"] <- 2
levels(data$cs)[levels(data$cs) == "average"] <- 3
levels(data$cs)[levels(data$cs) == "good"] <- 4
levels(data$cs)[levels(data$cs) == "very good"] <- 5
all data should be numeric data
#clean the data
num.data <- apply(data[, -c(1,2)], 2, as.numeric) # create dataframe to store the numerical transformation of raw data excluded fno and country
num.data <- as.data.frame(as.matrix(num.data)) # convert from vector to matrix
data <- cbind(data[ , c("fno", "country")], num.data)
data <- data[ , apply(data[, -c(1,2)], 2, var, na.rm = TRUE) != 0] # exclude the column with variation = 0
data <- data[complete.cases(data), ] # exclude row which cantain NA
Cluster analyses using a nearest neighbor and a chi-square distance (15) were performed in subsets representing each site, in order to determine site-specific patterns of cropping practices and injury profiles. Clusters including less than n = 3 fields were disregarded from further steps. Cluster analysis can be applied to categorized data, and in this case with a chi-square distance.
Cluster analysis allows groups of plots haring similar chacteristics to be identified. It was used here to define clusters of injury types in a way similar to that used in the analysis of surveys. One feature is that this technique, when applied to results from factorially arranged experiments, generate clusters where one injury type often tend to predominate. One result from cluster analysis is that inclusion of one individual to a particular cluster can be seen as a new, synthetic variable. Each plot, therefore, can then be described as one element of a given cluster of injury type, where often one injury predominates.
start.PS <- "pc"
end.PS <- "fu"
start.col.PS <- match(start.PS, names(data))
end.col.PS <- match(end.PS, names(data))
PS.data <- data[, c(1, start.col.PS:end.col.PS)]
#distance matrix
dist.PS <- daisy(PS.data[-1]) # Dissimilarity Matrix Calculation from cluster library, "grower" method can handel the mix variables.
## Warning in daisy(PS.data[-1]): binary variable(s) 1, 2, 6 treated as
## interval scaled
cluster.PS <- hclust(dist.PS, method = "average")
dendro.PS <- as.dendrogram(cluster.PS)
plot(dendro.PS, center = T, nodePar = list(lab.cex = 0.6, lab.col = "black", pch = NA), main = "Dendrogram for Production situation")
# draw retangles
rect.hclust(tree = cluster.PS, k=2, border = c("red", "blue"))
Here is the heatmap and dendrogram showing that Nitrogen is the big influence to seperate the observation into two groups.
aheatmap(subset(PS.data, select = -c(fno, mf)), distfun = daisy, hclustfun = "average")
## Warning in distfun(mat): binary variable(s) 1, 2, 6 treated as interval
## scaled
We realized that the total number of observation of surveys are 420. If we have many clusters, It will affect of reliability of the results because the number of observations is small. So we decided to cluster into two groups.
#number of members in each cluster
PS.no <- cutree(cluster.PS, k = 2) #
# cophenitic correlation
rcluster.PS <- cophenetic(cluster.PS) # identify which observation drop in which groups
#cor(dist.PS, rcluster.PS) # Cophenetic correlation is an measure of how faitfully a dendrogram perserves the pairwise distance between the original unmodeled points
# It should be very close to 1 for a high-quality solution.
data <- cbind(data, PS.no) # add to this data into the raw data set
data$PS.no <- as.factor(data$PS.no) # define this value as the factor
g <- ggplot(data, aes(x = country)) +
geom_histogram(weights = count) +
stat_bin(geom = "text", aes(label = ..count..), vjust = 1.5, color = "white" ) +
scale_y_continuous(limits = c(0, 125), breaks = NULL ) +
theme(legend.position = "none") +
ggtitle("The no of observation of dataset in each country")
print(g)
## ymax not defined: adjusting position using y instead
name.country <- c("IDN", "IND", "THA", "VNM", "PHL") #sort(as.vector(unique(groups)))
by.country.data <- list()
for(i in 1:length(name.country)){
temp <- data %>% filter(country == name.country[i])
by.country.data[[i]] <- temp
}
par(mfrow=c(1, 2))
for(i in 1:length(name.country)) {
g <- ggplot(by.country.data[[i]], aes(x = PS.no)) +
geom_histogram(weights = count) +
ggtitle(paste("Histogram PS cluster of", name.country[i])) +
theme(legend.position="none")
print(g)
}
# The profile of PS no
clus.PS.data <- data[, -c(1, 17:56)] # select only the production sitation data
m.PS.data <- melt(clus.PS.data)
## Using country, PS.no as id variables
name.variable <- levels(m.PS.data$variable)
name.PS.no <- levels(m.PS.data$PS.no)
# check the profile of each cluster by histogram
layout(matrix(c(1:14), 14, 2, byrow = TRUE))
for(i in 1:length(name.variable)) {
subtemp <- subset(m.PS.data, variable == name.variable[i])
for(j in 1: length(name.PS.no)){
subtemp1 <- subtemp %>% filter(PS.no == name.PS.no[j])
g <- ggplot(subtemp1, aes(x = value)) +
geom_bar(aes(y = (..count..)/sum(..count..)), binwidth = 1) +
scale_y_continuous(limits = c(0, 1), labels = percent) +
ylab("Percent") + xlab(paste(name.variable[i])) +
ggtitle(paste("Histogram of", name.variable[i], "in PS. no", name.PS.no[j]))
print(g) # the normal rscript do not need to use print function for express graph but here is rknit
}
}
#head(data)
# select only the variables related to the injury profiles
start.IP <- "dhx" # set to read the data from column named "dhx"
end.IP <- "rtx" # set to read the data from column named "rtx"
start.col.IP <- match(start.IP, names(data)) # match function for check which column of the data mactch the column named following the conditons above
end.col.IP <- match(end.IP, names(data)) # match function for check which column of the data mactch the column named following the conditons above
IP.data <- data[start.col.IP:end.col.IP] # select the columns of raw data which are following the condition above
#==== pre-process before run correlaiton function
# because the correlaiton measures will not allow to perform with variables whihc have not variance.
IP.data <- IP.data[ ,apply(IP.data, 2, var, na.rm = TRUE) != 0] # exclude the column (variables) with variation = 0
groups <- paste(data$country, data$PS.no, sep = "_") #combine two cloumn names country and PS
IP.data <- cbind(groups, IP.data)
IP.data[is.na(IP.data)] <- 0
trts <- as.vector(unique(IP.data$groups))
The analysis presented in paper is designed to capture the co-occurence patterns with in the inju The analysis presented is designed to test for differences in co-occurrence patterns at the community level across ecosystems, identify modules of co-occurring microorganisms within communities, and identify pairwise co-occurrence patterns within modules that are consistent across ecosystems (summarized in Figure 1). We considered co-occurrence to be positive rank correlations (Spearman’s correlation) between pairs of microbes within each dataset with the strength of the relationship represented by the correlation coefficient (Figure 1B). Negative correlations (indica- tive of either competitive interactions or non-overlapping niches between microbes; Faust and Raes, 2012) were also included in this analysis though they were a small subset of our combined datasets. We only considered negative and positive co-occurrence relationships based on strength of correlation (i.e., ρ from the Spearman’s correlation) at values less than or equal to −0.75 and −0.5 or greater than 0.5 and 0.75.
Spearman’s correlation was used because it obnly check if two OTU are monotonically related, rather than having a linear relationship. As a result is less sensitive to difference in abundance, and thios was seesireable because abumndance information may have I should rtake nothe definition of the value that we collected in the surveys Co-occurence was assessed using a previosuly descripted methos ()
#=====co_occurrence_pairwise.R====
results <- matrix(nrow = 0, ncol = 7) # create results to store the outputs
options(warnings = -1) # setting not to show the massages as the warnings
for(a in 1:length(trts)){
# subset the raw data by groups of countries and cluster of production situation
trt.temp <- trts[a]
#subset the dataset for those treatments
temp <- subset(IP.data, groups == trt.temp)
#in this case the community data started at column 6, so the loop for co-occurrence has to start at that point
for(b in 2:(dim(temp)[2]-1)){
#every species will be compared to every other species, so there has to be another loop that iterates down the rest of the columns
for(c in (b+1):(dim(temp)[2])){
#summing the abundances of species of the columns that will be compared
species1.ab <- sum(temp[,b])
species2.ab <- sum(temp[,c])
#if the column is all 0's no co-occurrence will be performed
if(species1.ab >1 & species2.ab >1){
test <- cor.test(temp[,b], temp[,c], method = "spearman", na.action = na.rm, exact = FALSE)
# There are warnings when setting exact = TRUE because of ties from the output of Spearman's correlation
# stackoverflow.com/questions/10711395/spear-man-correlation and ties
# It would be still valid if the data is not normally distributed.
rho <- test$estimate
p.value <- test$p.value
}
if(species1.ab <=1 | species2.ab <= 1){
rho<-0
p.value<-1
}
new.row <- c(trts[a], names(temp)[b], names(temp)[c], rho, p.value, species1.ab, species2.ab)
results <- rbind(results, new.row)
}
}
print(a/length(trts))
}
## [1] 0.1111111
## [1] 0.2222222
## [1] 0.3333333
## [1] 0.4444444
## [1] 0.5555556
## [1] 0.6666667
## [1] 0.7777778
## [1] 0.8888889
## [1] 1
results <- data.frame(data.matrix(results))
names(results) <- c("trt","taxa1","taxa2","rho","p.value","ab1","ab2")
#making sure certain variables are factors
results$trt <- as.factor(results$trt)
results$taxa1 <- as.character(as.factor(results$taxa1))
results$taxa2 <- as.character(as.factor(results$taxa2))
results$rho <- as.numeric(as.character(results$rho))
results$p.value <- as.numeric(as.character(results$p.value))
results$ab1 <- as.numeric(as.character(results$ab1))
results$ab2 <- as.numeric(as.character(results$ab2))
head(results)
## trt taxa1 taxa2 rho p.value ab1 ab2
## 1 PHL_1 dhx whx 0.06524012 0.6891879742 1307 2089
## 2 PHL_1 dhx ssx -0.12272044 0.4506097948 1307 109
## 3 PHL_1 dhx defa 0.18989475 0.2405423584 1307 1941
## 4 PHL_1 dhx bphx -0.11699678 0.4721633481 1307 1286
## 5 PHL_1 dhx wbpx -0.50743562 0.0008316712 1307 84
## 6 PHL_1 dhx awx 0.00000000 1.0000000000 1307 1
results_sub <- subset(results, as.numeric(as.character(abs(rho))) > 0.25)
results_sub.by.group <- list()
name.groups <- name.groups <- c("IDN_1", "IDN_2", "IND_1", "IND_2", "THA_1", "THA_2", "VNM_1", "VNM_2", "PHL_1") #sort(as.vector(unique(groups)))
for(i in 1: length(name.groups)){
results_sub.by.group[[i]] <- subset(results_sub, trt == name.groups[i])
}
# head(results_sub.by.group[[1]][,2:3]) # get the list
#layout(matrix(c(1:14), 9, 2, byrow = TRUE))
par(mfrow = c(1,2))
g <- list()
for(i in 1:length(name.groups)){
g[[i]] <- graph.edgelist(as.matrix(results_sub.by.group[[i]][, 2:3]), directed = FALSE)
#== adjust layout
l <- layout.circle(g[[i]])
#== adjust vertices
V(g[[i]])$color <- "tomato"
V(g[[i]])$frame.color <- "gray40"
V(g[[i]])$shape <- "circle"
V(g[[i]])$size <- 25
V(g[[i]])$label.color <- "white"
V(g[[i]])$label.font <- 2
V(g[[i]])$label.family <- "Helvetica"
V(g[[i]])$label.cex <- 0.7
#== adjust the edge
E(g[[i]])$weight <- as.matrix(results_sub.by.group[[i]][, 4])
E(g[[i]])$width <- 1 + E(g[[i]])$weight*5
col <- c("firebrick3", "forestgreen")
colc <- cut(results_sub.by.group[[i]]$rho, breaks = c(-1, 0, 1), include.lowest = TRUE)
E(g[[i]])$color <- col[colc]
#== plot network model
plot(g[[i]], layout = l * 1.0, main = paste( "network model of", name.groups[i]))
}
network.value <- list()
for(i in 1:length(g)){
E(g[[i]])$weight <- as.matrix(abs(results_sub.by.group[[i]][, 4]))
network.value[[i]] <- data.frame(
id = V(g[[i]])$name,
deg = degree(g[[i]]), # degree
bet = betweenness(g[[i]]), # betweenness
clo = closeness(g[[i]]), # closeness
eig = evcent(g[[i]])$vector,# egin.cent
cor = graph.coreness(g[[i]]), # coreness
tra = transitivity(g[[i]], type = c("local")) # cluster coefficients
)
network.value[[i]]$res <- as.vector(lm(eig ~ bet, data = network.value[[i]])$residuals)
network.value[[i]]$group <- name.groups[i]
}
net.vertice.data <- as.data.frame(do.call("rbind", network.value))
row.names(net.vertice.data) <- NULL
net.vertice.data$id <- as.factor(net.vertice.data$id)
net.vertice.data$group <- as.factor(net.vertice.data$group)
m.net.vertice.data <- melt(net.vertice.data)
## Using id, group as id variables
# compare degree
m.net.vertice.data %>% filter(variable == "deg") %>%
ggplot(aes(x= value, y = id)) +
geom_point(size = 3, color ="blue") +
facet_wrap(~group) +
theme_bw() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(color = "grey", linetype = 3)) +
ggtitle("Degree of each node in the injury profile network")
m.net.vertice.data %>% filter(variable == "bet") %>%
ggplot(aes(x= value, y = id)) +
geom_point(size = 3, color ="blue") +
facet_wrap(~group) +
theme_bw() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(color = "grey", linetype = 3)) +
ggtitle("Betweenness of each node in the injury profile network")
m.net.vertice.data %>% filter(variable == "bet") %>%
ggplot(aes(x= value, y = id)) +
geom_point(size = 3, color ="blue") +
facet_wrap(~group) +
theme_bw() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(color = "grey", linetype = 3)) +
ggtitle("Betweenness of each node in the injury profile network")
m.net.vertice.data %>% filter(variable == "clo") %>%
ggplot(aes(x= value, y = id)) +
geom_point(size = 3, color ="blue") +
facet_wrap(~group) +
theme_bw() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(color = "grey", linetype = 3)) +
ggtitle("Closeness of each node in the injury profile network")
m.net.vertice.data %>% filter(variable == "eig") %>%
ggplot(aes(x= value, y = reorder(id, value))) +
geom_point(size = 3, color ="blue") +
facet_wrap(~group) +
theme_bw() +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(color = "grey", linetype = 3)
) +
ggtitle("Eigenvector of each node in the injury profile network")
m.net.vertice.data %>% filter(variable == "cor") %>%
ggplot(aes(x= value, y = reorder(id, value))) +
geom_point(size = 3, color ="blue") +
facet_wrap(~group) +
theme_bw() +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(color = "grey", linetype = 3)
) +
ggtitle("Eigenvector of each node in the injury profile network")
One method is to plot / regress eigenvector centrality on betweenness and examine the residue.
A variable or node with high betweenness and low eigenvector centrality may be an importanct gatkeeper to a central actor
A variable or node with low betweeness and hight eigenvector centrality may have unique access to central actor
layout(matrix(c(1:9), 5, 2, byrow = TRUE))
## Warning in matrix(c(1:9), 5, 2, byrow = TRUE): data length [9] is not a
## sub-multiple or multiple of the number of rows [5]
for(i in 1:length(network.value)){
plot <- ggplot(
network.value[[i]], aes(x = bet, y = eig,
label = rownames(network.value[[i]]),
color = res,
size = abs(res))) +
xlab("Betweenness Centrality") +
ylab("Eigencvector Centrality") +
geom_text() +
ggtitle(paste("Key Actor Analysis for Injuiry Profiles of", name.groups[i]))
print(plot)
}
# dd <- degree.distribution(g[[1]], cumulative = T, mode = "all")
# plot(dd, pch = 19, cex = 1, col = "orange", xlab = "Degree", ylab = "Cumulative Frequesncy")
# distances(g[[1]])
# #shortest_paths(g[[1]], 5)
# all_shortest_paths(g[[1]], 1, 6:8)
# mean_distance(g[[1]])
# hist(degree(g[[1]]), col = "lightblue", xlim = c(0, 10), xlab = "Vertex Degree", ylab = "Frequency", main = "")
# hist(graph.strength(g[[1]]), col = "pink", xlim = c(0, 5), xlab = "Vertex strength", ylab = "Frequency", main = "")
#global.prop <- NULL
#i <-1
#for(i in 1:length(g)){
#adj.mat <- as.matrix(get.adjacency(g[[i]], attr = "weight"))
#new.global.prop <- as.data.frame(fundamentalNetworkConcepts(adj.mat))
#names(new.global.prop) <- name.groups[i]
#global.prop <- c(global.prop, new.global.prop)
#net.result <- fundamentalNetworkConcepts(mat[[i]])
#conformityBasedNetworkConcepts(mat)
#}
#sum.global <- as.data.frame(do.call("cbind", global.prop))
#rownames(sum.global) <- c("Density", "Centralization", "Heterogeneity", "Mean ClusterCoef", "Mean Connectivity")
# cleate all the possible pair of the variables
# IP.list <- NULL
# for(i in 2:(dim(IP.data)[2]-1)){
# for(j in (i+1):(dim(IP.data)[2])){
# new.row <- c(names(IP.data)[i], names(IP.data)[2 + j])
# IP.list <- rbind(IP.list, new.row)
# }
# }
#
# IP.list <- as.data.frame(IP.list)
# names(IP.list) <- c("var1", "var2")
# IP.list$rho <- 0
# results$newrho <- ifelse(abs(results$rho) > 0.25, results$rho, 0)
#
# IDN1 <- results %>% filter(trt == "IDN_1")
# IDN2 <- results %>% filter(trt == "IDN_2")
# IND1 <- results %>% filter(trt == "IND_1")
# IND2 <- results %>% filter(trt == "IND_2")
# THA1 <- results %>% filter(trt == "THA_1")
# THA2 <- results %>% filter(trt == "THA_2")
# VNM1 <- results %>% filter(trt == "VNM_1")
# VNM2 <- results %>% filter(trt == "VNM_1")
# PHL1 <- results %>% filter(trt == "PHL_1")